# coding=utf-8
""""
    Matrix factorization model for item prediction (ranking) optimized using BPR (BPRMF)
    [Item Recommendation (Ranking)]

    Literature:
        Steffen Rendle, Christoph Freudenthaler, Zeno Gantner, Lars Schmidt-Thieme:
        BPR: Bayesian Personalized Ranking from Implicit Feedback.
        UAI 2009.
        http://www.ismll.uni-hildesheim.de/pub/pdfs/Rendle_et_al2009-Bayesian_Personalized_Ranking.pdf

"""

# © 2018. Case Recommender (MIT License)

import random
import numpy as np

from caserec.recommenders.item_recommendation.base_item_recommendation import BaseItemRecommendation
from caserec.utils.extra_functions import timed

__author__ = 'removed for double blind review'


class BprMF(BaseItemRecommendation):
    def __init__(self, train_file=None, test_file=None, output_file=None, factors=10, learn_rate=0.05, epochs=30,
                 batch_size=0, rank_length=10, init_mean=0, init_stdev=0.1, reg_u=0.0025, reg_i=0.0025,
                 reg_j=0.00025, reg_bias=0, sep='\t', output_sep='\t', random_seed=None, items_test=False):
        """
        BPRMF for Item Recommendation

         BPR reduces ranking to pairwise classification. The different variants (settings) of this recommender
         roughly optimize the area under the ROC curve (AUC).

        Usage::

            >> BprMF(train, test).compute()
            >> BprMF(train, test, batch_size=30).compute()

        :param train_file: File which contains the train set. This file needs to have at least 3 columns
        (user item feedback_value).
        :type train_file: str

        :param test_file: File which contains the test set. This file needs to have at least 3 columns
        (user item feedback_value).
        :type test_file: str, default None

        :param output_file: File with dir to write the final predictions
        :type output_file: str, default None

        :param factors: Number of latent factors per user/item
        :type factors: int, default 10

        :param learn_rate: Learning rate (alpha)
        :type learn_rate: float, default 0.05

        :param epochs: Number of epochs over the training data
        :type epochs: int, default 30

        :param batch_size: Reduce number of interactions in each epoch, if 0 usage the number of positive interactions
        in the train set
        :type batch_size: int, default 0

        :param rank_length: Size of the rank that must be generated by the predictions of the recommender algorithm
        :type rank_length: int, default 10

        :param init_mean: Mean of the normal distribution used to initialize the latent factors
        :type init_mean: float, default 0

        :param init_stdev: Standard deviation of the normal distribution used to initialize the latent factors
        :type init_stdev: float, default 0.1

        :param reg_u: Regularization parameter for user factors
        :type reg_u: float, default 0.0025

        :param reg_i: Regularization parameter for positive item factors
        :type reg_i: float, default 0.0025

        :param reg_j: Regularization parameter for negative item factors
        :type reg_j: float, default 0.00025

        :param reg_bias: Regularization parameter for the bias term
        :type reg_bias: default 0

        :param sep: Delimiter for input files
        :type sep: str, default '\t'

        :param output_sep: Delimiter for output file
        :type output_sep: str, default '\t'

        :param random_seed: Number of seed. Lock random numbers for reproducibility of experiments.
        :type random_seed: int, default None

        :param items_test: If True, update unobserved set of each user with samples in the test set
        :type items_test: bool, default False

        """

        super(BprMF, self).__init__(train_file=train_file, test_file=test_file, output_file=output_file,
                                    rank_length=rank_length, sep=sep, output_sep=output_sep)

        self.recommender_name = 'BPRMF'

        self.factors = factors
        self.learn_rate = learn_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.init_mean = init_mean
        self.init_stdev = init_stdev
        self.reg_bias = reg_bias
        self.reg_u = reg_u
        self.reg_i = reg_i
        self.reg_j = reg_j
        self.items_test = items_test

        if random_seed is not None:
            np.random.seed(random_seed)
            random.seed(random_seed)

        # internal vars
        self.p = None
        self.q = None
        self.bias = None
        self.num_interactions = None

    def init_model(self):
        """
        Method to treat and initialize the model

        """

        # Upgrade unobserved items with test set samples
        if self.items_test:
            for u, user in enumerate(self.users):
                self.train_set['items_unobserved'][user] = list(set(self.items) -
                                                                set(self.train_set['items_seen_by_user'][user]))

        # Initialize factors
        self.create_factors()

        # Define number of interactions in each epoch
        if self.batch_size <= 0:
            self.num_interactions = self.train_set['number_interactions']
        else:
            self.num_interactions = int(self.train_set['number_interactions'] / self.batch_size) + 1

    def fit(self):
        """
        This method performs iterations of stochastic gradient ascent over the training data. One iteration is samples
        number of positive entries in the training matrix times, if batch size is 0, else we divide the number of
        positive entries per batch size (see in the init_model).

        """

        for n in range(self.epochs):
            random_users = random.choices(self.train_set['users'], k=self.num_interactions)
            for user in random_users:
                i, j = self.sample_pair(user)
                self.update_factors(self.user_to_user_id[user], self.item_to_item_id[i], self.item_to_item_id[j])

    def create_factors(self):
        """
        This method create factors for users, items and bias

        """

        self.p = np.random.normal(self.init_mean, self.init_stdev, (len(self.users), self.factors))
        self.q = np.random.normal(self.init_mean, self.init_stdev, (len(self.items), self.factors))
        self.bias = np.zeros(len(self.items), np.double)

    def sample_pair(self, user):
        """
        Randomly selects a known and unknown item to a particular user.

        :param user: User to generate pairs
        :type user: int

        :return: known item, unknown item

        """
        return random.choice(list(self.train_set['items_seen_by_user'][user])), random.choice(
            self.train_set['items_unobserved'][user])

    def predict_score(self, user, item):
        """
        Method to predict a single score for a pair (user, item)

        :param user: User ID
        :type user: int

        :param item: Item ID
        :type item: int

        :return: Score generate for pair (user, item)
        :rtype: float

        """

        return np.dot(self.p[user], self.q[item])

    def update_factors(self, u, i, j):
        """
        Update latent factors according to the stochastic gradient descent update rule

        :param u: User ID for update
        :type u: int

        :param i: Known Item ID
        :type i: int

        :param j: Unknown Item ID
        :type j: int
        """

        # Compute Difference
        x_uij = self.bias[i] - self.bias[j] + (self.predict_score(u, i) - self.predict_score(u, j))
        eps = 1 / (1 + np.exp(x_uij))

        self.bias[i] += self.learn_rate * (eps - self.reg_bias * self.bias[i])
        self.bias[j] += self.learn_rate * (-eps - self.reg_bias * self.bias[j])

        # Adjust the factors
        u_f = self.p[u]
        i_f = self.q[i]
        j_f = self.q[j]

        # Compute and apply factor updates
        self.p[u] += self.learn_rate * ((i_f - j_f) * eps - self.reg_u * u_f)
        self.q[i] += self.learn_rate * (u_f * eps - self.reg_i * i_f)
        self.q[j] += self.learn_rate * (-u_f * eps - self.reg_j * j_f)

    def predict(self):
        """
        This method predict final result, building an rank of each user of the train set.

        """

        w = self.bias.T + np.dot(self.p, self.q.T)

        for u, user in enumerate(self.users):
            partial_ranking = list()
            candidate_items = sorted(range(len(w[u])), key=lambda k: w[u][k], reverse=True)

            for i in candidate_items:
                item = self.item_id_to_item[i]

                if item not in self.train_set['items_seen_by_user'].get(user, self.items):
                    partial_ranking.append((user, item, w[u][i]))

                if len(partial_ranking) == self.rank_length:
                    break

            self.ranking += partial_ranking

    def compute(self, verbose=True, metrics=None, verbose_evaluation=True, as_table=False, table_sep='\t'):
        """
        Extends compute method from BaseItemRecommendation. Method to run recommender algorithm

        :param verbose: Print recommender and database information
        :type verbose: bool, default True

        :param metrics: List of evaluation measures
        :type metrics: list, default None

        :param verbose_evaluation: Print the evaluation results
        :type verbose_evaluation: bool, default True

        :param as_table: Print the evaluation results as table
        :type as_table: bool, default False

        :param table_sep: Delimiter for print results (only work with verbose=True and as_table=True)
        :type table_sep: str, default '\t'

        """

        super(BprMF, self).compute(verbose=verbose)

        if verbose:
            self.init_model()
            print("training_time:: %4f sec" % timed(self.fit))
            if self.extra_info_header is not None:
                print(self.extra_info_header)

            print("prediction_time:: %4f sec" % timed(self.predict))

            print('\n')

        else:
            # Execute all in silence without prints
            self.init_model()
            self.fit()
            self.predict()

        self.write_ranking()

        if self.test_file is not None:
            self.evaluate(metrics, verbose_evaluation, as_table=as_table, table_sep=table_sep)
